Setup libraries

library(tidyverse)
library(terra)
library(MCMCvis)

# ggplot theme set
theme_set(theme_bw())

setup data

wd <- "/Users/elaw/Desktop/LeuphanaProject/BirdModelling/ETH_birds"
results_folder <- paste0(wd, "/Results/")  
version_folder <- "v10/"
mydate <- 20221207
samples <- readRDS(paste0(results_folder, version_folder, "BirdOccMod_dOcc_samples_forest_", mydate, "_seed101.RDS"))
data_input <- readRDS(paste0(wd, "/Data/Forest_nimbleOccData_v6_", mydate, ".RDS"))
list2env(data_input[[1]], envir = .GlobalEnv); list2env(data_input[[2]], envir = .GlobalEnv); rm(data_input)
## <environment: R_GlobalEnv>
## <environment: R_GlobalEnv>
rast_folder <- "/Users/elaw/Desktop/LeuphanaProject/ETH_SpatialData/data_10m/Baseline/"
forest_stack <- rast(paste0(rast_folder, "forest_stack_10m_Baseline.tif"))
pred_stack <- rast(paste0(results_folder, version_folder, "forest_pred_stack_10m_Baseline.tif"))

rast_vals <- values(pred_stack, na.rm=TRUE) %>% as_tibble()
site_vals <- as_tibble(Xoc); names(site_vals) <- names(rast_vals)
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.

site vars vs raster vars

summarise_all(rast_vals, range) %>% t()
##                  [,1]     [,2]
## elevation   -2.630699 3.996370
## frdis       -2.053845 6.662770
## slope       -3.710781 4.578314
## forest_type  0.000000 1.000000
summarise_all(site_vals, range) %>% t()
##                  [,1]     [,2]
## elevation   -1.534821 2.162803
## frdis       -1.618772 2.096995
## slope       -2.319035 2.051561
## forest_type  0.000000 1.000000

Plot histograms

Site sampled variables (green) are typically a biased sub-section of the range of variables in the rasters (purple). This is particularly the case for Forest distance (to forest edge), due to the challenges of access to sampling in the forest interior.

plot_hists <- function(myvar, binwidth=0.25){
  mc <- viridis::viridis(3)
  ggplot(rast_vals, aes(x={{myvar}}, y = stat(density))) +
    geom_histogram(binwidth = binwidth, fill=mc[1], alpha = 0.5) + 
    geom_histogram(data = site_vals, binwidth = binwidth, fill=mc[2], alpha = 0.5)
}

plot_hists(elevation)

plot_hists(frdis)

plot_hists(slope)

plot_hists(forest_type, binwidth = 1)

Examine betalpsi parameters from each species

# select a reasonable set of samples 
mydraw <- seq(0, dim(samples$chain1)[1], 1000)
mysamples <- list(
  chain1 = samples$chain1[mydraw, ],
  chain2 = samples$chain2[mydraw, ],
  chain3 = samples$chain3[mydraw, ]
)

# extract betas  # 1=elevation 2=fr_dis  3=slope 4=forest_type
b0 <- MCMCsummary(mysamples, 
                  params = 'lpsi\\[\\d{1,3}\\]', ISB = FALSE, exact = FALSE,  
                  round = 2)
b1 <- MCMCsummary(mysamples, 
                  params = 'betalpsi\\[\\d{1,3}[,][ ][1]\\]', ISB = FALSE, exact = FALSE,  
                  round = 2)
b2 <- MCMCsummary(mysamples, 
                  params = 'betalpsi\\[\\d{1,3}[,][ ][2]\\]', ISB = FALSE, exact = FALSE,  
                  round = 2)
b3 <- MCMCsummary(mysamples, 
                  params = 'betalpsi\\[\\d{1,3}[,][ ][3]\\]', ISB = FALSE, exact = FALSE,  
                  round = 2)
b4 <- MCMCsummary(mysamples, 
                  params = 'betalpsi\\[\\d{1,3}[,][ ][4]\\]', ISB = FALSE, exact = FALSE,  
                  round = 2)

tibble(
  ElevationMean = sum(b1$mean > 0), ElevationMedian = sum(b1$`50%`>0),
  ForestDistanceMean = sum(b2$mean > 0), ForestDistanceMedian = sum(b2$`50%`>0),
  SlopeMean = sum(b3$mean > 0), SlopeMedian = sum(b3$`50%`>0),
  ForestTypeMean = sum(b4$mean > 0), ForestTypeMedian = sum(b4$`50%`>0)
) %>% t %>% 
  as_tibble(rownames = "SummaryType") %>% 
  rename(Positive=V1) %>% 
  mutate(Negative=M-Positive)
## # A tibble: 8 × 3
##   SummaryType          Positive Negative
##   <chr>                   <int>    <dbl>
## 1 ElevationMean              92       93
## 2 ElevationMedian            88       97
## 3 ForestDistanceMean        105       80
## 4 ForestDistanceMedian      106       79
## 5 SlopeMean                  46      139
## 6 SlopeMedian                50      135
## 7 ForestTypeMean            185        0
## 8 ForestTypeMedian          185        0

plot the betalpsi params

plotbeta <- function(bp, bname, fgroup=NULL, fgroupName){
  bp <- bind_cols(bp, 
                  fspp=c(fspp, rep("unknown", Nadd)), 
                  mig=c(mig, rep("unknown", Nadd)),
                  fnDiet=c(fnDiet, rep("unknown", Nadd)),
                  invDiet=c(invDiet, rep("unknown", Nadd)))
  ggplot(arrange(bp,`50%`), 
         aes(x=`50%`, xmin=`2.5%`, xmax=`97.5%`,
             y=1:185))+
    geom_linerange(aes(color={{fgroup}}))+
    geom_point(aes(x=mean), color="darkgrey")+
    geom_point(aes(color={{fgroup}}))+
    geom_vline(xintercept=0, color="red")+
    scale_color_viridis_d()+
    labs(x=paste0(bname," beta parameter:\n95% HDI (lines), mean (grey points), median (color points)"),
         y="Species, sorted by median beta",
         color = fgroupName)+
    theme(axis.text.y=element_blank(),
          axis.ticks.y=element_blank())
}

plotbeta(b1, "Elevation", fspp, "Forest species")

plotbeta(b2, "Forest distance", fspp, "Forest species")

plotbeta(b3, "Slope", fspp, "Forest species")

plotbeta(b4, "Forest type", fspp, "Forest species")

plotbeta(b1, "Elevation", mig, "Migratory species")

plotbeta(b2, "Forest distance", mig, "Migratory species")

plotbeta(b3, "Slope", mig, "Migratory species")

plotbeta(b4, "Forest type", mig, "Migratory species")

plot beta relationships

newdata <- tibble(
  elevation = seq(min(rast_vals$elevation), max(rast_vals$elevation), length.out=100) %>% rep(2),
  mElevation = mean(rast_vals$elevation),
  frdis = seq(min(rast_vals$frdis), max(rast_vals$frdis), length.out=100) %>% rep(2),
  mFrdis = mean(rast_vals$frdis),
  slope = seq(min(rast_vals$slope), max(rast_vals$slope), length.out=100) %>% rep(2),
  mSlope = mean(rast_vals$slope),
  foresttype = rep(0:1, each = 100)
)

elev_psi <- matrix(data=NA, nrow = nrow(newdata), ncol = M)
for(k in 1:M){
  logitpsi <- b0$mean[k]  +
    b1$mean[k] * newdata$elevation + 
    b2$mean[k] * newdata$mFrdis +
    b3$mean[k] * newdata$mSlope +
    b4$mean[k] * newdata$foresttype 
  
  elev_psi[,k] <- exp(logitpsi)/(exp(logitpsi) + 1)
}

frdis_psi <- matrix(data=NA, nrow = nrow(newdata), ncol = M)
for(k in 1:M){
  logitpsi <- b0$mean[k]  +
    b1$mean[k] * newdata$mElevation + 
    b2$mean[k] * newdata$frdis +
    b3$mean[k] * newdata$mSlope +
    b4$mean[k] * newdata$foresttype 
  
  frdis_psi[,k] <- exp(logitpsi)/(exp(logitpsi) + 1)
}

slope_psi <- matrix(data=NA, nrow = nrow(newdata), ncol = M)
for(k in 1:M){
  logitpsi <- b0$mean[k]  +
    b1$mean[k] * newdata$mElevation + 
    b2$mean[k] * newdata$mFrdis +
    b3$mean[k] * newdata$slope +
    b4$mean[k] * newdata$foresttype 
  
  slope_psi[,k] <- exp(logitpsi)/(exp(logitpsi) + 1)
}

out_psi <- elev_psi %>% 
  as_tibble() %>% 
  bind_cols(newdata) %>% 
  pivot_longer(cols = V1:V185, names_to = "species", values_to = "elev_psi") %>% 
  mutate(species = str_replace(species, "V", "sp"),
         species = factor(species, levels = paste0('sp',1:185))) %>% 
  bind_cols(
    frdis_psi %>% 
      as_tibble() %>% 
      pivot_longer(cols = V1:V185, names_to = "species", values_to = "frdis_psi") %>% 
      select(-species)
  ) %>% 
  bind_cols(
    slope_psi %>% 
      as_tibble() %>% 
      pivot_longer(cols = V1:V185, names_to = "species", values_to = "slope_psi") %>% 
      select(-species)
  ) %>% 
  add_column(
    b1 = rep(b1$mean, 200),
    b2 = rep(b2$mean, 200),
    b3 = rep(b3$mean, 200),
    b4 = rep(b4$mean, 200),
    fspp = rep(c(fspp, rep(FALSE, Nadd)),200), 
    fnDiet=rep(c(fnDiet, rep(FALSE, Nadd)),200), 
    invDiet=rep(c(invDiet, rep(FALSE, Nadd)),200),
    observed=rep(c(rep("observed", Nobs), rep("absent", Nadd)), 200)
  ) %>% 
  mutate(fspp = if_else(fspp==TRUE, "fspp", "other"),
         fnDiet = if_else(fnDiet==TRUE, "fnDiet", "other"),
         invDiet = if_else(invDiet==TRUE, "invDiet", "other"))

plotpsi <- function(xvar, yvar, xname, yname, oxmin=-Inf, oxmax=Inf){
  ggplot(out_psi, 
         aes(x={{xvar}}, y={{yvar}}, color=species)) +
    # add rectangles showing non-sampled projection area
    annotate("rect", xmin = -Inf, xmax = oxmin, ymin = -Inf, ymax = Inf, 
             alpha =0.3)+
    annotate("rect", xmin = oxmax, xmax = Inf, ymin = -Inf, ymax = Inf, 
             alpha =0.3)+
    geom_line(alpha=0.5) + 
    scale_color_viridis_d()+
    theme(legend.position = "none") +
    facet_grid(.~foresttype, labeller =label_both)+
    labs(x=xname, y=yname)
}

plotpsi(elevation, elev_psi, 
        "Elevation (center scaled, other vars at mean)", "psi", 
        min(site_vals$elevation), max(site_vals$elevation))

plotpsi(frdis, frdis_psi, 
        "Forest distance (center scaled, other vars at mean)", "psi", 
        min(site_vals$frdis), max(site_vals$frdis))

plotpsi(slope, slope_psi, 
        "Slope (center scaled, other vars at mean)", "psi", 
        min(site_vals$slope), max(site_vals$slope))

Sum species over the parameters

plotSR <- function(xvar, y, xname, yname, oxmin=-Inf, oxmax=Inf){
  bind_cols(
    newdata,
    SR = rowSums(y),
    SRfspp = rowSums(y[,fspp]),
    SRmig = rowSums(y[,mig]),
    SRfnDiet = rowSums(y[,fnDiet]),
    SRinvDiet = rowSums(y[,invDiet])
  ) %>% 
    pivot_longer(cols = SR:SRinvDiet, names_to = "SpeciesSelection", values_to = "SR") %>% 
    mutate(foresttype = factor(foresttype)) %>% 
  ggplot(., 
         aes(x={{xvar}}, y=SR, color=SpeciesSelection, lty=foresttype), lwd=2) +
    # add rectangles showing non-sampled projection area
    annotate("rect", xmin = -Inf, xmax = oxmin, ymin = -Inf, ymax = Inf, 
             alpha =0.3)+
    annotate("rect", xmin = oxmax, xmax = Inf, ymin = -Inf, ymax = Inf, 
             alpha =0.3)+
    geom_line() + 
    scale_color_viridis_d()+
    scale_linetype_manual(values = c("dashed", "solid"))+
    labs(x=xname, y=yname)
}

plotSR(elevation, elev_psi, 
        "Elevation (center scaled, other vars at mean)", "Species Richness (sum PSI)", 
        min(site_vals$elevation), max(site_vals$elevation))

plotSR(frdis, frdis_psi, 
        "Forest distance (center scaled, other vars at mean)","Species Richness (sum PSI)", 
        min(site_vals$frdis), max(site_vals$frdis))

plotSR(slope, slope_psi, 
        "Slope (center scaled, other vars at mean)", "Species Richness (sum PSI)", 
        min(site_vals$slope), max(site_vals$slope))

Probability of detection

MCMCsummary(mysamples, 
                  params = "betalp",  
                  round = 2)
##            mean   sd  2.5%   50% 97.5% Rhat n.eff
## betalp[1]  0.35 0.05  0.27  0.36  0.40 1.13    21
## betalp[2] -0.11 0.04 -0.15 -0.11 -0.03 1.41    20
## betalp[3] -0.13 0.05 -0.23 -0.13 -0.07 0.82    12
# extract betas for detection
d0 <- MCMCsummary(mysamples, 
                  params = "lp",  
                  round = 2)
d1 <- MCMCsummary(mysamples, 
                  params = 'betalp\\[[1]\\]', ISB = FALSE, exact = FALSE, 
                  round = 2)
d2 <- MCMCsummary(mysamples, 
                  params = 'betalp\\[[2]\\]', ISB = FALSE, exact = FALSE,  
                  round = 2)
d3 <- MCMCsummary(mysamples, 
                  params = 'betalp\\[[3]\\]', ISB = FALSE, exact = FALSE,  
                  round = 2)

logitp_v1 <- matrix(d0$mean, length(d0$mean), dim(Xob)[1])  +
  d1$mean * Xob[,1,1] + 
  d2$mean * Xob[,1,2] +
  d3$mean * Xob[,1,3] 

logitp_v2 <- matrix(d0$mean, length(d0$mean), dim(Xob)[1])  +
  d1$mean * Xob[,2,1] + 
  d2$mean * Xob[,2,2] +
  d3$mean * Xob[,2,3] 

p_v1 <- exp(logitp_v1)/(exp(logitp_v1) + 1)
p_v2 <- exp(logitp_v2)/(exp(logitp_v2) + 1) 

# mean probability of detection, all species, all sites, on visit 1 and visit 2
mean(p_v1); mean(p_v2)
## [1] 0.04075618
## [1] 0.05965252
# plot probability of detection
tibble(
  mean_p_v1 = rowMeans(p_v1),
  mean_p_v2 = rowMeans(p_v2)
) %>% 
  arrange(-mean_p_v2, -mean_p_v1) %>% 
  add_column(species = 1:M) %>% 
  ggplot(., aes(x=species, ymin=mean_p_v1, ymax=mean_p_v2)) +
  geom_hline(yintercept=mean(p_v1), color="red", lty="dotted") +
  geom_hline(yintercept=mean(p_v2), color="red") +
  geom_linerange() +
  geom_point(aes(y=mean_p_v1), pch=1) +
  geom_point(aes(y=mean_p_v2), color="black") +
  labs(x="Species, sorted by probability of detection (visit 2)", 
       y="Mean probability of detection over all sites\nOpen circles = visit 1; Closed circles = visit 2")